Imports:
import json, os, zipfile
import pandas as pd
import numpy as np
import geopandas as gpd
from ipyleaflet import (Map, GeoData, basemaps, WidgetControl, GeoJSON,
LayersControl, Icon, Marker,basemap_to_tiles, Choropleth,
MarkerCluster, Heatmap,SearchControl,
FullScreenControl, SplitMapControl)
from branca.colormap import linear
from ipywidgets import Text, HTML
Extract EPC data for Leeds only:
full_epc_data = zipfile.ZipFile('data/raw/England-and-Wales-EPC-All.zip', 'r')
lad_code = 'E08000035'
file_to_extract = [x for x in full_epc_data.namelist() if lad_code in x and 'certificates.csv' in x][0]
if os.path.exists(f'data/extracts/{file_to_extract}'):
extracted_data_path = f'data/extracts/{file_to_extract}'
else:
extracted_data_path = full_epc_data.extract(file_to_extract, path='data/extracts/')
Read in EPC data for Leeds:
epc_data = pd.read_csv(extracted_data_path, low_memory=False)
epc_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 291914 entries, 0 to 291913 Data columns (total 90 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 LMK_KEY 291914 non-null object 1 ADDRESS1 291914 non-null object 2 ADDRESS2 118730 non-null object 3 ADDRESS3 13970 non-null object 4 POSTCODE 291914 non-null object 5 BUILDING_REFERENCE_NUMBER 291914 non-null int64 6 CURRENT_ENERGY_RATING 291914 non-null object 7 POTENTIAL_ENERGY_RATING 291914 non-null object 8 CURRENT_ENERGY_EFFICIENCY 291914 non-null int64 9 POTENTIAL_ENERGY_EFFICIENCY 291914 non-null int64 10 PROPERTY_TYPE 291914 non-null object 11 BUILT_FORM 291914 non-null object 12 INSPECTION_DATE 291914 non-null object 13 LOCAL_AUTHORITY 291914 non-null object 14 CONSTITUENCY 291914 non-null object 15 COUNTY 1 non-null object 16 LODGEMENT_DATE 291914 non-null object 17 TRANSACTION_TYPE 291914 non-null object 18 ENVIRONMENT_IMPACT_CURRENT 291914 non-null int64 19 ENVIRONMENT_IMPACT_POTENTIAL 291914 non-null int64 20 ENERGY_CONSUMPTION_CURRENT 291914 non-null int64 21 ENERGY_CONSUMPTION_POTENTIAL 291914 non-null float64 22 CO2_EMISSIONS_CURRENT 291914 non-null float64 23 CO2_EMISS_CURR_PER_FLOOR_AREA 291914 non-null float64 24 CO2_EMISSIONS_POTENTIAL 291914 non-null float64 25 LIGHTING_COST_CURRENT 291914 non-null float64 26 LIGHTING_COST_POTENTIAL 291914 non-null float64 27 HEATING_COST_CURRENT 291914 non-null float64 28 HEATING_COST_POTENTIAL 291914 non-null float64 29 HOT_WATER_COST_CURRENT 291914 non-null float64 30 HOT_WATER_COST_POTENTIAL 291914 non-null float64 31 TOTAL_FLOOR_AREA 291914 non-null float64 32 ENERGY_TARIFF 291914 non-null object 33 MAINS_GAS_FLAG 264109 non-null object 34 FLOOR_LEVEL 291914 non-null object 35 FLAT_TOP_STOREY 74527 non-null object 36 FLAT_STOREY_COUNT 26851 non-null float64 37 MAIN_HEATING_CONTROLS 266346 non-null object 38 MULTI_GLAZE_PROPORTION 263774 non-null float64 39 GLAZED_TYPE 291914 non-null object 40 GLAZED_AREA 291914 non-null object 41 EXTENSION_COUNT 265466 non-null float64 42 NUMBER_HABITABLE_ROOMS 265466 non-null float64 43 NUMBER_HEATED_ROOMS 265466 non-null float64 44 LOW_ENERGY_LIGHTING 281490 non-null float64 45 NUMBER_OPEN_FIREPLACES 285720 non-null float64 46 HOTWATER_DESCRIPTION 291914 non-null object 47 HOT_WATER_ENERGY_EFF 291198 non-null object 48 HOT_WATER_ENV_EFF 291157 non-null object 49 FLOOR_DESCRIPTION 291889 non-null object 50 FLOOR_ENERGY_EFF 120452 non-null object 51 FLOOR_ENV_EFF 16789 non-null object 52 WINDOWS_DESCRIPTION 291799 non-null object 53 WINDOWS_ENERGY_EFF 291646 non-null object 54 WINDOWS_ENV_EFF 291646 non-null object 55 WALLS_DESCRIPTION 291912 non-null object 56 WALLS_ENERGY_EFF 291643 non-null object 57 WALLS_ENV_EFF 291643 non-null object 58 SECONDHEAT_DESCRIPTION 291914 non-null object 59 SHEATING_ENERGY_EFF 0 non-null float64 60 SHEATING_ENV_EFF 0 non-null float64 61 ROOF_DESCRIPTION 291715 non-null object 62 ROOF_ENERGY_EFF 234975 non-null object 63 ROOF_ENV_EFF 234975 non-null object 64 MAINHEAT_DESCRIPTION 291914 non-null object 65 MAINHEAT_ENERGY_EFF 291156 non-null object 66 MAINHEAT_ENV_EFF 291159 non-null object 67 MAINHEATCONT_DESCRIPTION 291914 non-null object 68 MAINHEATC_ENERGY_EFF 291449 non-null object 69 MAINHEATC_ENV_EFF 291449 non-null object 70 LIGHTING_DESCRIPTION 291914 non-null object 71 LIGHTING_ENERGY_EFF 291505 non-null object 72 LIGHTING_ENV_EFF 291612 non-null object 73 MAIN_FUEL 291914 non-null object 74 WIND_TURBINE_COUNT 272060 non-null float64 75 HEAT_LOSS_CORRIDOOR 291914 non-null object 76 UNHEATED_CORRIDOR_LENGTH 32490 non-null float64 77 FLOOR_HEIGHT 111261 non-null float64 78 PHOTO_SUPPLY 161373 non-null float64 79 SOLAR_WATER_HEATING_FLAG 175187 non-null object 80 MECHANICAL_VENTILATION 291914 non-null object 81 ADDRESS 291914 non-null object 82 LOCAL_AUTHORITY_LABEL 291914 non-null object 83 CONSTITUENCY_LABEL 291914 non-null object 84 POSTTOWN 291914 non-null object 85 CONSTRUCTION_AGE_BAND 289873 non-null object 86 LODGEMENT_DATETIME 291914 non-null object 87 TENURE 285202 non-null object 88 FIXED_LIGHTING_OUTLETS_COUNT 114067 non-null float64 89 LOW_ENERGY_FIXED_LIGHT_COUNT 114066 non-null float64 dtypes: float64(26), int64(6), object(58) memory usage: 200.4+ MB
Drop unneccessary columns:
epc_data.drop(columns=['ADDRESS1', 'ADDRESS2', 'ADDRESS3', 'COUNTY', 'ADDRESS', 'LOCAL_AUTHORITY_LABEL', 'CONSTITUENCY_LABEL', 'POSTTOWN'], inplace=True)
Grab ONS Postcode Lookup file (2020):
![ ! -f data/ONS-Postcode-Lookup.zip ] && wget https://www.arcgis.com/sharing/rest/content/items/b982ad8161dd4fc29bbe76038ae32c10/data -O 'data/ONS-Postcode-Lookup.zip'
Read in the ONS Postcode Lookup file & drop unnecessary columns:
pcd_lookup = pd.read_csv('data/ONS-Postcode-Lookup.zip', compression='zip', usecols=['pcds', 'lsoa11cd', 'ladcd'], low_memory=False)
pcd_lookup.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2647046 entries, 0 to 2647045 Data columns (total 3 columns): # Column Dtype --- ------ ----- 0 pcds object 1 lsoa11cd object 2 ladcd object dtypes: object(3) memory usage: 60.6+ MB
Merge the EPC data with the ONS postcode lookup (removing any entries with invalid postcodes):
epc_data['POSTCODE'] = epc_data['POSTCODE'].str.replace('\s*', '').str.upper()
pcd_lookup['POSTCODE'] = pcd_lookup['pcds'].str.replace('\s*', '').str.upper()
count = len(epc_data)
epc_data = epc_data.merge(pcd_lookup, on='POSTCODE', how='inner')
print(f'Dropped {count - len(epc_data)} rows with invalid postcodes')
Dropped 125 rows with invalid postcodes
Drop rows where the LSOA isn't in Leeds:
leeds_lsoa_codes = pcd_lookup[pcd_lookup['ladcd'] == lad_code]['lsoa11cd'].drop_duplicates()
epc_data = epc_data[epc_data['lsoa11cd'].isin(leeds_lsoa_codes)]
Sort by EPC date and remove outdated EPC's (ie. if a property has multiple EPC's)
epc_data['LODGEMENT_DATE'] = pd.to_datetime(epc_data['LODGEMENT_DATE'], infer_datetime_format=True, errors='coerce')
epc_data['INSPECTION_DATE'] = pd.to_datetime(epc_data['INSPECTION_DATE'], infer_datetime_format=True, errors='coerce')
epc_data.sort_values(by=['INSPECTION_DATE', 'LODGEMENT_DATE'], ascending=False, inplace=True)
count = len(epc_data)
epc_data.drop_duplicates(subset=['POSTCODE', 'BUILDING_REFERENCE_NUMBER'], keep='first', ignore_index=True, inplace=True)
print(f"Dropped {count - len(epc_data)} outdated EPC's")
Dropped 67987 outdated EPC's
Create a table to store output:
lsoa_summary = pd.DataFrame(index=leeds_lsoa_codes)
Calculate average current & potential EPC ratings by LSOA:
mode = lambda x: pd.Series.mode(x)[0]
lsoa_summary['CURRENT_EPC_RATING_MODE'] = epc_data.groupby('lsoa11cd')['CURRENT_ENERGY_RATING'].agg(mode)
lsoa_summary['POTENTIAL_EPC_RATING_MODE'] = epc_data.groupby('lsoa11cd')['POTENTIAL_ENERGY_RATING'].agg(mode)
lsoa_summary.info()
<class 'pandas.core.frame.DataFrame'> Index: 482 entries, E01011527 to E01011500 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 CURRENT_EPC_RATING_MODE 482 non-null object 1 POTENTIAL_EPC_RATING_MODE 482 non-null object dtypes: object(2) memory usage: 11.3+ KB
Standardise age band naming:
epc_data['CONSTRUCTION_AGE_BAND'].unique()
array(['England and Wales: before 1900', 'England and Wales: 2003-2006',
'England and Wales: 1900-1929', 'England and Wales: 1930-1949',
'England and Wales: 1950-1966', 'England and Wales: 2007 onwards',
'England and Wales: 1967-1975', 'NO DATA!',
'England and Wales: 1991-1995', 'England and Wales: 1976-1982',
'England and Wales: 1996-2002', 'England and Wales: 1983-1990',
'INVALID!', nan], dtype=object)
epc_data['CONSTRUCTION_AGE_BAND'].replace({'NO DATA!': np.nan, 'INVALID!': np.nan}, inplace=True)
epc_data['CONSTRUCTION_AGE_BAND'] = epc_data['CONSTRUCTION_AGE_BAND'].str.lstrip('England and Wales: ')
epc_data['CONSTRUCTION_AGE_BAND'].replace({'before 1900': 'PRE-1900', '2007 onwards': '2007-2020'}, inplace=True)
epc_data['CONSTRUCTION_AGE_BAND'].unique()
array(['PRE-1900', '2003-2006', '1900-1929', '1930-1949', '1950-1966',
'2007-2020', '1967-1975', nan, '1991-1995', '1976-1982',
'1996-2002', '1983-1990'], dtype=object)
Get the number of properties in each age band by LSOA:
epc_age_bands_by_lsoa = epc_data[['lsoa11cd', 'CONSTRUCTION_AGE_BAND']].fillna('UNKNOWN').pivot_table(index=['lsoa11cd'], columns='CONSTRUCTION_AGE_BAND', aggfunc='size', fill_value=0)
epc_age_bands_by_lsoa
| CONSTRUCTION_AGE_BAND | 1900-1929 | 1930-1949 | 1950-1966 | 1967-1975 | 1976-1982 | 1983-1990 | 1991-1995 | 1996-2002 | 2003-2006 | 2007-2020 | PRE-1900 | UNKNOWN |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| lsoa11cd | ||||||||||||
| E01011264 | 3 | 69 | 116 | 41 | 3 | 0 | 1 | 0 | 0 | 0 | 16 | 4 |
| E01011265 | 110 | 38 | 14 | 47 | 17 | 34 | 9 | 9 | 56 | 10 | 163 | 168 |
| E01011266 | 34 | 83 | 45 | 36 | 11 | 6 | 5 | 8 | 93 | 134 | 13 | 79 |
| E01011267 | 22 | 45 | 158 | 44 | 11 | 5 | 10 | 14 | 13 | 12 | 32 | 94 |
| E01011268 | 0 | 70 | 203 | 27 | 41 | 29 | 3 | 3 | 1 | 0 | 0 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| E01033031 | 24 | 9 | 88 | 21 | 1 | 0 | 1 | 15 | 62 | 327 | 8 | 332 |
| E01033032 | 7 | 8 | 65 | 87 | 5 | 21 | 4 | 44 | 255 | 267 | 2 | 858 |
| E01033033 | 0 | 1 | 75 | 7 | 1 | 0 | 1 | 10 | 198 | 252 | 0 | 1 |
| E01033034 | 5 | 0 | 0 | 0 | 0 | 6 | 9 | 154 | 287 | 325 | 18 | 221 |
| E01033035 | 2 | 2 | 41 | 51 | 0 | 7 | 2 | 20 | 0 | 0 | 0 | 587 |
482 rows × 12 columns
Calculate total number of properties and mode age band by LSOA:
lsoa_summary[['EPC_PROPERTY_COUNT', 'EPC_PROPERTY_AGE_BAND_MODE']] = epc_age_bands_by_lsoa.agg(['sum', lambda row: row.idxmax(axis=1)], axis=1)
lsoa_summary
| CURRENT_EPC_RATING_MODE | POTENTIAL_EPC_RATING_MODE | EPC_PROPERTY_COUNT | EPC_PROPERTY_AGE_BAND_MODE | |
|---|---|---|---|---|
| lsoa11cd | ||||
| E01011527 | D | B | 352 | 1967-1975 |
| E01011589 | D | B | 461 | PRE-1900 |
| E01011457 | D | C | 337 | PRE-1900 |
| E01011521 | D | B | 589 | 2003-2006 |
| E01011529 | D | C | 285 | 1950-1966 |
| ... | ... | ... | ... | ... |
| E01011537 | D | B | 479 | 1930-1949 |
| E01011551 | D | B | 421 | 1900-1929 |
| E01011499 | C | B | 940 | UNKNOWN |
| E01032496 | C | B | 601 | 2003-2006 |
| E01011500 | D | B | 355 | 1950-1966 |
482 rows × 4 columns
Get VOA 2020 Housing Stock data to compare:
voa_housing_stock_data = pd.read_csv('https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/920382/Table_CTSOP4_1_2020.csv', encoding='cp1252', low_memory=False)
voa_housing_stock_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 383404 entries, 0 to 383403 Data columns (total 22 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 GEOGRAPHY 383404 non-null object 1 BA_CODE 3053 non-null float64 2 ECODE 383404 non-null object 3 AREA_NAME 383404 non-null object 4 band 383368 non-null object 5 BP_PRE_1900 383368 non-null object 6 BP_1900_1918 383368 non-null object 7 BP_1919_1929 383368 non-null object 8 BP_1930_1939 383368 non-null object 9 BP_1945_1954 383368 non-null object 10 BP_1955_1964 383368 non-null object 11 BP_1965_1972 383368 non-null object 12 BP_1973_1982 383368 non-null object 13 BP_1983_1992 383368 non-null object 14 BP_1993_1999 383368 non-null object 15 BP_2000_2008 383368 non-null object 16 BP_2009_2011 383368 non-null object 17 BP_2012_2014 383368 non-null object 18 BP_2015_2017 383368 non-null object 19 BP_2018_2020 383368 non-null object 20 BP_UNKNOWN 383368 non-null object 21 ALL_PROPERTIES 383368 non-null object dtypes: float64(1), object(21) memory usage: 64.4+ MB
Filter to Leeds LSOA's only:
voa_housing_stock_data = voa_housing_stock_data[(voa_housing_stock_data['band'] == 'All') & (voa_housing_stock_data['ECODE'].isin(leeds_lsoa_codes))]
voa_housing_stock_data.set_index('ECODE', inplace=True)
voa_housing_stock_data.index.name = 'lsoa11cd'
Drop unnecessary columns and standardise age band naming:
columns_to_keep = [x for x in voa_housing_stock_data.columns if x.startswith('BP')]
voa_housing_stock_data.drop(columns=voa_housing_stock_data.columns.difference(columns_to_keep), inplace=True)
voa_housing_stock_data.rename(columns=lambda x: x.replace('BP_', '').replace('_', '-'), inplace=True)
Convert column datatypes to numerical:
voa_housing_stock_data = voa_housing_stock_data.apply(pd.to_numeric, errors='coerce').fillna(0)
Calculate total properties and mode age band by LSOA:
lsoa_summary[['VOA_PROPERTY_COUNT', 'VOA_PROPERTY_AGE_BAND_MODE']] = voa_housing_stock_data.agg(['sum', lambda row: row.idxmax(axis=1)], axis=1)
lsoa_summary
| CURRENT_EPC_RATING_MODE | POTENTIAL_EPC_RATING_MODE | EPC_PROPERTY_COUNT | EPC_PROPERTY_AGE_BAND_MODE | VOA_PROPERTY_COUNT | VOA_PROPERTY_AGE_BAND_MODE | |
|---|---|---|---|---|---|---|
| lsoa11cd | ||||||
| E01011527 | D | B | 352 | 1967-1975 | 780 | 1965-1972 |
| E01011589 | D | B | 461 | PRE-1900 | 730 | PRE-1900 |
| E01011457 | D | C | 337 | PRE-1900 | 650 | PRE-1900 |
| E01011521 | D | B | 589 | 2003-2006 | 1040 | 2000-2008 |
| E01011529 | D | C | 285 | 1950-1966 | 590 | 1973-1982 |
| ... | ... | ... | ... | ... | ... | ... |
| E01011537 | D | B | 479 | 1930-1949 | 1010 | 1965-1972 |
| E01011551 | D | B | 421 | 1900-1929 | 750 | PRE-1900 |
| E01011499 | C | B | 940 | UNKNOWN | 1290 | 2000-2008 |
| E01032496 | C | B | 601 | 2003-2006 | 970 | 2000-2008 |
| E01011500 | D | B | 355 | 1950-1966 | 720 | 1955-1964 |
482 rows × 6 columns
lsoa_summary.info()
<class 'pandas.core.frame.DataFrame'> Index: 482 entries, E01011527 to E01011500 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 CURRENT_EPC_RATING_MODE 482 non-null object 1 POTENTIAL_EPC_RATING_MODE 482 non-null object 2 EPC_PROPERTY_COUNT 482 non-null object 3 EPC_PROPERTY_AGE_BAND_MODE 482 non-null object 4 VOA_PROPERTY_COUNT 482 non-null object 5 VOA_PROPERTY_AGE_BAND_MODE 482 non-null object dtypes: object(6) memory usage: 26.4+ KB
lsoa_summary['EPC_PROPERTY_COUNT'] = pd.to_numeric(lsoa_summary['EPC_PROPERTY_COUNT'], downcast='integer')
lsoa_summary['VOA_PROPERTY_COUNT'] = pd.to_numeric(lsoa_summary['VOA_PROPERTY_COUNT'], downcast='integer')
Calculate percentage of total properties with a published EPC:
lsoa_summary['PROPERTIES_WITH_EPC_PERC'] = (lsoa_summary['EPC_PROPERTY_COUNT'] / lsoa_summary['VOA_PROPERTY_COUNT']).round(2)
lsoa_summary
| CURRENT_EPC_RATING_MODE | POTENTIAL_EPC_RATING_MODE | EPC_PROPERTY_COUNT | EPC_PROPERTY_AGE_BAND_MODE | VOA_PROPERTY_COUNT | VOA_PROPERTY_AGE_BAND_MODE | PROPERTIES_WITH_EPC_PERC | |
|---|---|---|---|---|---|---|---|
| lsoa11cd | |||||||
| E01011527 | D | B | 352 | 1967-1975 | 780 | 1965-1972 | 0.45 |
| E01011589 | D | B | 461 | PRE-1900 | 730 | PRE-1900 | 0.63 |
| E01011457 | D | C | 337 | PRE-1900 | 650 | PRE-1900 | 0.52 |
| E01011521 | D | B | 589 | 2003-2006 | 1040 | 2000-2008 | 0.57 |
| E01011529 | D | C | 285 | 1950-1966 | 590 | 1973-1982 | 0.48 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| E01011537 | D | B | 479 | 1930-1949 | 1010 | 1965-1972 | 0.47 |
| E01011551 | D | B | 421 | 1900-1929 | 750 | PRE-1900 | 0.56 |
| E01011499 | C | B | 940 | UNKNOWN | 1290 | 2000-2008 | 0.73 |
| E01032496 | C | B | 601 | 2003-2006 | 970 | 2000-2008 | 0.62 |
| E01011500 | D | B | 355 | 1950-1966 | 720 | 1955-1964 | 0.49 |
482 rows × 7 columns
Sort by LSOA and save as CSV file:
lsoa_summary.sort_index().to_csv('output/leeds-lsoa-summary.csv', index_label='lsoa11cd')
Grab the LSOA 2011 Boundaries Shapefile from the ONS and filter to Leeds only:
lsoa_geo = gpd.read_file('https://opendata.arcgis.com/datasets/c892586698ad4d268f9288f1df20ab77_0.zip?outSR=%7B%22latestWkid%22%3A27700%2C%22wkid%22%3A27700%7D')
lsoa_geo = lsoa_geo[lsoa_geo['LSOA11CD'].isin(leeds_lsoa_codes)]
lsoa_geo.drop(columns=lsoa_geo.columns.difference(['LSOA11CD', 'LSOA11NM', 'geometry']), inplace=True)
lsoa_geo.to_crs(epsg=4326, inplace=True)
Merge the LSOA boundaries table with our LSOA summary table to create a geographic summary:
lsoa_geo = lsoa_geo.merge(lsoa_summary, left_on='LSOA11CD', right_on='lsoa11cd', how='inner')
lsoa_geo.info()
<class 'geopandas.geodataframe.GeoDataFrame'> Int64Index: 482 entries, 0 to 481 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 LSOA11CD 482 non-null object 1 LSOA11NM 482 non-null object 2 geometry 482 non-null geometry 3 CURRENT_EPC_RATING_MODE 482 non-null object 4 POTENTIAL_EPC_RATING_MODE 482 non-null object 5 EPC_PROPERTY_COUNT 482 non-null int16 6 EPC_PROPERTY_AGE_BAND_MODE 482 non-null object 7 VOA_PROPERTY_COUNT 482 non-null int16 8 VOA_PROPERTY_AGE_BAND_MODE 482 non-null object 9 PROPERTIES_WITH_EPC_PERC 482 non-null float64 dtypes: float64(1), geometry(1), int16(2), object(6) memory usage: 35.8+ KB
Save as GeoJSON:
lsoa_geo.to_file('output/leeds-lsoa-summary.geojson', driver='GeoJSON')
Load the saved GeoJSON and create some maps:
with open('output/leeds-lsoa-summary.geojson', 'r') as fp:
lsoa_geojson = json.load(fp)
for feature in lsoa_geojson['features']:
feature.update(id=feature['properties']['LSOA11CD'])
lsoa_properties_with_epc = dict(zip(lsoa_geo['LSOA11CD'].tolist(), lsoa_geo['PROPERTIES_WITH_EPC_PERC'].tolist()))
lsoa_map = Map(basemap=basemaps.CartoDB.Positron, center=[53.8030441,-1.5386524], zoom=10.5)
# lsoa_map.add_layer(layer)
choro = Choropleth(
geo_data=lsoa_geojson,
choro_data=lsoa_properties_with_epc,
value_min=0,
value_max=1,
colormap=linear.viridis
)
lsoa_map.add_layer(choro)
lsoa_map
def create_linear_choropleth(key):
choro_data = dict(zip(lsoa_geo['LSOA11CD'].tolist(), lsoa_geo[key].tolist()))
return Choropleth(
name=key,
geo_data=lsoa_geojson,
choro_data=choro_data,
colormap=linear.viridis,
value_min=lsoa_geo[key].min(),
value_max=lsoa_geo[key].max()
)
def create_epc_rating_choropleth(key):
unique_ratings = lsoa_geo[key].sort_values(ascending=False).unique().tolist()
epc_rating_values = lsoa_geo[key].apply(lambda x: unique_ratings.index(x))
choro_data = dict(zip(lsoa_geo['LSOA11CD'].tolist(), epc_rating_values.tolist()))
return Choropleth(
name=key,
geo_data=lsoa_geojson,
choro_data=choro_data,
colormap=linear.RdYlGn_03.scale(0, len(unique_ratings) - 1).to_step(index=[*range(0, len(unique_ratings))]),
style={'opacity': 0, 'fillOpacity': 0.65}
)
def create_age_band_choropleth(key):
age_bands = lsoa_geo[key].apply(lambda x: int(x[-4:]) if x[-1].isdigit() else 0)
unique_age_bands = age_bands.sort_values().unique().tolist()
color_scale = linear.viridis.scale(1900, 2020).to_step(index=unique_age_bands)
return Choropleth(
name=key,
geo_data=lsoa_geojson,
choro_data=dict(zip(lsoa_geo['LSOA11CD'].tolist(), age_bands.tolist())),
colormap=color_scale
)
epc_current_rating_layer = create_epc_rating_choropleth('CURRENT_EPC_RATING_MODE')
epc_potential_rating_layer = create_epc_rating_choropleth('POTENTIAL_EPC_RATING_MODE')
epc_property_count_layer = create_linear_choropleth('EPC_PROPERTY_COUNT')
voa_property_count_layer = create_linear_choropleth('VOA_PROPERTY_COUNT')
epc_age_band_layer = create_age_band_choropleth('EPC_PROPERTY_AGE_BAND_MODE')
voa_age_band_layer = create_age_band_choropleth('VOA_PROPERTY_AGE_BAND_MODE')
epc_rating_map = Map(basemap=basemaps.CartoDB.Positron, center=[53.8030441,-1.5386524], zoom=10.5)
epc_rating_choropleth = create_epc_rating_choropleth('CURRENT_EPC_RATING_MODE')
lsoa_boundaries = GeoData(geo_dataframe=lsoa_geo, style={'color': 'black', 'weight': 0.2, 'opacity': 1, 'fillOpacity': 0})
html = HTML('Hover over an LSOA')
html.layout.margin = '0px 20px 20px 20px'
control = WidgetControl(widget=html, position='topright')
epc_rating_map.add_control(control)
def update_html(feature, **kwargs):
html.value = f"<h3>{feature['properties']['LSOA11NM']}</h3><p><b>Mode EPC Rating:</b> {feature['properties']['CURRENT_EPC_RATING_MODE']}</p>"
lsoa_boundaries.on_hover(update_html)
epc_rating_map.add_layer(epc_rating_choropleth)
epc_rating_map.add_layer(lsoa_boundaries)
epc_rating_map